"""Module for reading Lobster output files.
For more information on LOBSTER see www.cohp.de.

If you use this module, please cite:
J. George, G. Petretto, A. Naik, M. Esters, A. J. Jackson, R. Nelson, R. Dronskowski, G.-M. Rignanese, G. Hautier,
"Automated Bonding Analysis with Crystal Orbital Hamilton Populations",
ChemPlusChem 2022, e202200123,
DOI: 10.1002/cplu.202200123.
"""

from __future__ import annotations

import collections
import fnmatch
import itertools
import os
import re
import warnings
from collections import defaultdict
from typing import TYPE_CHECKING, cast

import numpy as np
from monty.dev import deprecated
from monty.io import zopen
from monty.json import MSONable

from pymatgen.core.structure import Structure
from pymatgen.electronic_structure.bandstructure import LobsterBandStructureSymmLine
from pymatgen.electronic_structure.core import Orbital, Spin
from pymatgen.electronic_structure.dos import Dos, LobsterCompleteDos
from pymatgen.io.vasp.inputs import Kpoints
from pymatgen.io.vasp.outputs import Vasprun, VolumetricData
from pymatgen.util.due import Doi, due

if TYPE_CHECKING:
    from typing import Any, ClassVar, Literal

    from numpy.typing import NDArray

    from pymatgen.core.structure import IStructure
    from pymatgen.electronic_structure.cohp import IcohpCollection
    from pymatgen.util.typing import PathLike

__author__ = "Janine George, Marco Esters"
__copyright__ = "Copyright 2017, The Materials Project"
__version__ = "0.2"
__maintainer__ = "Janine George "
__email__ = "janinegeorge.ulfen@gmail.com"
__date__ = "Dec 13, 2017"


due.cite(
    Doi("10.1002/cplu.202200123"),
    description="Automated Bonding Analysis with Crystal Orbital Hamilton Populations",
)


def _get_lines(filename) -> list[str]:
    with zopen(filename, mode="rt", encoding="utf-8") as file:
        return cast("list[str]", file.read().splitlines())


class Cohpcar:
    """Read COXXCAR.lobster/COXXCAR.LCFO.lobster files generated by LOBSTER.

    Attributes:
        cohp_data (dict[str, Dict[str, Any]]): The COHP data of the form:
            {bond: {"COHP": {Spin.up: cohps, Spin.down:cohps},
                    "ICOHP": {Spin.up: icohps, Spin.down: icohps},
                    "length": bond length,
                    "sites": sites corresponding to the bond}
            Also contains an entry for the average, which does not have a "length" key.
        efermi (float): The Fermi level in eV.
        energies (Sequence[float]): Sequence of energies in eV. Note that LOBSTER
            shifts the energies so that the Fermi level is at zero.
        is_spin_polarized (bool): True if the calculation is spin polarized.
        orb_res_cohp (dict[str, Dict[str, Dict[str, Any]]]): The orbital-resolved COHPs of the form:
            orb_res_cohp[label] = {bond_data["orb_label"]: {
                "COHP": {Spin.up: cohps, Spin.down:cohps},
                "ICOHP": {Spin.up: icohps, Spin.down: icohps},
                "orbitals": orbitals,
                "length": bond lengths,
                "sites": sites corresponding to the bond},
            }
    """

    def __init__(
        self,
        are_coops: bool = False,
        are_cobis: bool = False,
        are_multi_center_cobis: bool = False,
        is_lcfo: bool = False,
        filename: PathLike | None = None,
    ) -> None:
        """
        Args:
            are_coops (bool): Whether the file includes COOPs (True) or COHPs (False).
                Default is False.
            are_cobis (bool): Whether the file is COBIs (True) or COHPs (False).
                Default is False.
            are_multi_center_cobis (bool): Whether the file include multi-center COBIs (True)
                or two-center COBIs (False). Default is False.
            is_lcfo (bool): Whether the COXXCAR file is from LCFO analysis.
            filename (PathLike): The COHPCAR file. If it is None, the default
                file name will be chosen, depending on the value of are_coops.
        """
        if (
            (are_coops and are_cobis)
            or (are_coops and are_multi_center_cobis)
            or (are_cobis and are_multi_center_cobis)
        ):
            raise ValueError("You cannot have info about COOPs, COBIs and/or multi-center COBIs in the same file.")

        self.are_coops = are_coops
        self.are_cobis = are_cobis
        self.are_multi_center_cobis = are_multi_center_cobis
        self.is_lcfo = is_lcfo
        self._filename = filename

        if self._filename is None:
            if are_coops:
                self._filename = "COOPCAR.lobster"
            elif are_cobis or are_multi_center_cobis:
                self._filename = "COBICAR.lobster"
            else:
                self._filename = "COHPCAR.lobster"

        lines: list[str] = _get_lines(self._filename)

        # The parameters line is the second line in a COHPCAR file.
        # It contains all parameters that are needed to map the file.
        parameters = lines[1].split()
        # Subtract 1 to skip the average
        num_bonds = int(parameters[0]) if self.are_multi_center_cobis else int(parameters[0]) - 1
        self.efermi = float(parameters[-1])
        self.is_spin_polarized = int(parameters[1]) == 2
        spins = [Spin.up, Spin.down] if int(parameters[1]) == 2 else [Spin.up]
        cohp_data: dict[str, dict[str, Any]] = {}

        # The COHP/COBI data start from line num_bonds + 3
        data = np.array([np.array(line.split(), dtype=float) for line in lines[num_bonds + 3 :]]).transpose()

        if not self.are_multi_center_cobis:
            cohp_data = {
                "average": {
                    "COHP": {spin: data[1 + 2 * s * (num_bonds + 1)] for s, spin in enumerate(spins)},
                    "ICOHP": {spin: data[2 + 2 * s * (num_bonds + 1)] for s, spin in enumerate(spins)},
                }
            }

        self.energies = data[0]

        orb_cohp: dict[str, Any] = {}
        # Present for LOBSTER versions older than 2.2.0
        older_than_2_2_0: bool = False

        # The label has to be changed: there are more than one COHP for each atom combination
        # this is done to make the labeling consistent with ICOHPLIST.lobster
        bond_num = 0
        bond_data = {}
        label = ""
        for bond in range(num_bonds):
            if not self.are_multi_center_cobis:
                bond_data = self._get_bond_data(lines[3 + bond], is_lcfo=self.is_lcfo)
                label = str(bond_num)
                orbs = bond_data["orbitals"]
                cohp = {spin: data[2 * (bond + s * (num_bonds + 1)) + 3] for s, spin in enumerate(spins)}
                icohp = {spin: data[2 * (bond + s * (num_bonds + 1)) + 4] for s, spin in enumerate(spins)}
                if orbs is None:
                    bond_num += 1
                    label = str(bond_num)
                    cohp_data[label] = {
                        "COHP": cohp,
                        "ICOHP": icohp,
                        "length": bond_data["length"],
                        "sites": bond_data["sites"],
                        "cells": None,
                    }

                elif label in orb_cohp:
                    orb_cohp[label] |= {
                        bond_data["orb_label"]: {
                            "COHP": cohp,
                            "ICOHP": icohp,
                            "orbitals": orbs,
                            "length": bond_data["length"],
                            "sites": bond_data["sites"],
                            "cells": bond_data["cells"],
                        }
                    }
                else:
                    # Present for LOBSTER versions older than 2.2.0
                    if bond_num == 0:
                        older_than_2_2_0 = True
                    if older_than_2_2_0:
                        bond_num += 1
                        label = str(bond_num)

                    orb_cohp[label] = {
                        bond_data["orb_label"]: {
                            "COHP": cohp,
                            "ICOHP": icohp,
                            "orbitals": orbs,
                            "length": bond_data["length"],
                            "sites": bond_data["sites"],
                            "cells": bond_data["cells"],
                        }
                    }

            else:
                bond_data = self._get_bond_data(
                    lines[2 + bond],
                    is_lcfo=self.is_lcfo,
                    are_multi_center_cobis=self.are_multi_center_cobis,
                )

                label = str(bond_num)
                orbs = bond_data["orbitals"]

                cohp = {spin: data[2 * (bond + s * (num_bonds)) + 1] for s, spin in enumerate(spins)}
                icohp = {spin: data[2 * (bond + s * (num_bonds)) + 2] for s, spin in enumerate(spins)}

                if orbs is None:
                    bond_num += 1
                    label = str(bond_num)
                    cohp_data[label] = {
                        "COHP": cohp,
                        "ICOHP": icohp,
                        "length": bond_data["length"],
                        "sites": bond_data["sites"],
                        "cells": bond_data["cells"],
                    }

                elif label in orb_cohp:
                    orb_cohp[label] |= {
                        bond_data["orb_label"]: {
                            "COHP": cohp,
                            "ICOHP": icohp,
                            "orbitals": orbs,
                            "length": bond_data["length"],
                            "sites": bond_data["sites"],
                        }
                    }
                else:
                    # Present for LOBSTER versions older than 2.2.0
                    if bond_num == 0:
                        older_than_2_2_0 = True
                    if older_than_2_2_0:
                        bond_num += 1
                        label = str(bond_num)

                    orb_cohp[label] = {
                        bond_data["orb_label"]: {
                            "COHP": cohp,
                            "ICOHP": icohp,
                            "orbitals": orbs,
                            "length": bond_data["length"],
                            "sites": bond_data["sites"],
                        }
                    }

        # Present for LOBSTER older than 2.2.0
        if older_than_2_2_0:
            for bond_str in orb_cohp:
                cohp_data[bond_str] = {
                    "COHP": None,
                    "ICOHP": None,
                    "length": bond_data["length"],
                    "sites": bond_data["sites"],
                }
        self.orb_res_cohp = orb_cohp or None
        self.cohp_data = cohp_data

    @staticmethod
    def _get_bond_data(line: str, is_lcfo: bool, are_multi_center_cobis: bool = False) -> dict[str, Any]:
        """Extract bond label, site indices, and length from
        a LOBSTER header line. The site indices are zero-based, so they
        can be easily used with a Structure object.

        Example header line:
            No.4:Fe1->Fe9(2.4524893531900283)
        Example header line for orbital-resolved COHP:
            No.1:Fe1[3p_x]->Fe2[3d_x^2-y^2](2.456180552772262)

        Args:
            line: line in the COHPCAR header describing the bond.
            is_lcfo: indicates whether the COXXCAR file is from LCFO analysis.
            are_multi_center_cobis: indicates multi-center COBIs

        Returns:
            Dict with the bond label, the bond length, a tuple of the site
                indices, a tuple containing the orbitals (if orbital-resolved),
                and a label for the orbitals (if orbital-resolved).
        """

        if not are_multi_center_cobis:
            line_new = line.rsplit("(", 1)
            length = float(line_new[-1][:-1])

            sites = line_new[0].replace("->", ":").split(":")[1:3]
            site_indices = tuple(int(re.split(r"\D+", site)[1]) - 1 for site in sites)
            # TODO: get cells here as well

            if "[" in sites[0] and not is_lcfo:
                orbs = [re.findall(r"\[(.*)\]", site)[0] for site in sites]
                orb_label, orbitals = get_orb_from_str(orbs)
            elif "[" in sites[0] and is_lcfo:
                orbs = [re.findall(r"\[(\d+[a-zA-Z]+\d*)", site)[0] for site in sites]
                orb_label = "-".join(orbs)
                orbitals = orbs
            else:
                orbitals = None
                orb_label = None

            return {
                "length": length,
                "sites": site_indices,
                "cells": None,
                "orbitals": orbitals,
                "orb_label": orb_label,
            }

        line_new = line.rsplit(sep="(", maxsplit=1)

        sites = line_new[0].replace("->", ":").split(":")[1:]
        site_indices = tuple(int(re.split(r"\D+", site)[1]) - 1 for site in sites)
        cells = [[int(i) for i in re.split(r"\[(.*?)\]", site)[1].split(" ") if i != ""] for site in sites]

        if sites[0].count("[") > 1:
            orbs = [re.findall(r"\]\[(.*)\]", site)[0] for site in sites]
            orb_label, orbitals = get_orb_from_str(orbs)
        else:
            orbitals = orb_label = None

        return {
            "sites": site_indices,
            "cells": cells,
            "length": None,
            "orbitals": orbitals,
            "orb_label": orb_label,
        }


class Icohplist(MSONable):
    """Read ICOXXLIST/ICOXXLIST.LCFO.lobster files generated by LOBSTER.

    Attributes:
        are_coops (bool): Whether the file includes COOPs (True) or COHPs (False).
        is_lcfo (bool): Whether the ICOXXLIST file is from LCFO analysis.
        is_spin_polarized (bool): Whether the calculation is spin polarized.
        Icohplist (dict[str, Dict[str, Union[float, int, Dict[Spin, float]]]]):
            The listfile data of the form: {
                bond: {
                    "length": Bond length,
                    "number_of_bonds": Number of bonds,
                    "icohp": {Spin.up: ICOHP(Ef)_up, Spin.down: ...},
                    }
            }
        IcohpCollection (IcohpCollection): IcohpCollection Object.
    """

    def __init__(
        self,
        is_lcfo: bool = False,
        are_coops: bool = False,
        are_cobis: bool = False,
        filename: PathLike | None = None,
        is_spin_polarized: bool = False,
        orbitalwise: bool = False,
        icohpcollection: IcohpCollection | None = None,
    ) -> None:
        """
        Args:
            is_lcfo (bool): Whether the ICOHPLIST file is from LCFO analysis.
            are_coops (bool): Whether the file includes COOPs (True) or COHPs (False).
                Default is False.
            are_cobis (bool): Whether the file is COBIs (True) or COHPs (False).
                Default is False.
            filename (PathLike): The ICOHPLIST file. If it is None, the default
                file name will be chosen, depending on the value of are_coops
            is_spin_polarized (bool): Whether the calculation is spin polarized.
            orbitalwise (bool): Whether the calculation is orbitalwise.
            icohpcollection (IcohpCollection): IcohpCollection Object.

        """
        # Avoid circular import
        from pymatgen.electronic_structure.cohp import IcohpCollection

        self._filename = filename
        self.is_lcfo = is_lcfo
        self.is_spin_polarized = is_spin_polarized
        self.orbitalwise = orbitalwise
        self._icohpcollection = icohpcollection
        if are_coops and are_cobis:
            raise ValueError("You cannot have info about COOPs and COBIs in the same file.")

        self.are_coops = are_coops
        self.are_cobis = are_cobis
        if self._filename is None:
            if are_coops:
                self._filename = "ICOOPLIST.lobster"
            elif are_cobis:
                self._filename = "ICOBILIST.lobster"
            else:
                self._filename = "ICOHPLIST.lobster"

        if self._icohpcollection is None:
            with zopen(self._filename, mode="rt", encoding="utf-8") as file:
                all_lines: list[str] = cast("list[str]", file.read().splitlines())

                # --- detect header length robustly ---
                header_len = 0
                try:
                    int(all_lines[0].split()[0])
                except ValueError:
                    header_len += 1
                if header_len < len(all_lines) and "spin" in all_lines[header_len].lower():
                    header_len += 1
                lines = all_lines[header_len:]
                if not lines:
                    raise RuntimeError("ICOHPLIST file contains no data.")
                # --- version by column count only ---
                ncol = len(lines[0].split())
                if ncol == 6:
                    version = "2.2.1"
                    warnings.warn(
                        "Please consider using a newer LOBSTER version. See www.cohp.de.",
                        stacklevel=2,
                    )
                elif ncol == 8:
                    version = "3.1.1"
                elif ncol == 9:
                    version = "5.1.0"
                else:
                    raise ValueError(f"Unsupported LOBSTER version ({ncol} columns).")

            # If the calculation is spin polarized, the line in the middle
            # of the file will be another header line.
            # TODO: adapt this for orbital-wise stuff
            if version in {"3.1.1", "2.2.1"}:
                self.is_spin_polarized = "distance" in lines[len(lines) // 2]
            else:  # if version == "5.1.0":
                self.is_spin_polarized = len(lines[0].split()) == 9

            # Check if is orbital-wise ICOHPLIST
            # TODO: include case where there is only one ICOHP
            if not self.is_lcfo:  #  data consists of atomic orbital interactions
                self.orbitalwise = len(lines) > 2 and "_" in lines[1].split()[1]
            else:  #  data consists of molecule or fragment orbital interactions
                self.orbitalwise = len(lines) > 2 and lines[1].split()[1].count("_") >= 2

            data_orbitals: list[str] = []
            if self.orbitalwise:
                data_without_orbitals = []
                data_orbitals = []
                for line in lines:
                    if (
                        ("_" not in line.split()[1] and version != "5.1.0")
                        or ("_" not in line.split()[1] and version == "5.1.0")
                        or ((line.split()[1].count("_") == 1) and version == "5.1.0" and self.is_lcfo)
                    ):
                        data_without_orbitals.append(line)
                    elif line.split()[1].count("_") >= 2 and version == "5.1.0":
                        data_orbitals.append(line)
                    else:
                        data_orbitals.append(line)

            else:
                data_without_orbitals = lines

            if "distance" in data_without_orbitals[len(data_without_orbitals) // 2]:
                # TODO: adapt this for orbital-wise stuff
                n_bonds = len(data_without_orbitals) // 2
                if n_bonds == 0:
                    raise RuntimeError("ICOHPLIST file contains no data.")
            else:
                n_bonds = len(data_without_orbitals)

            labels: list[str] = []
            atom1_list: list[str] = []
            atom2_list: list[str] = []
            lens: list[float] = []
            translations: list[tuple[int, int, int]] = []
            nums: list[int] = []
            icohps: list[dict[Spin, float]] = []

            for bond in range(n_bonds):
                line_parts = data_without_orbitals[bond].split()
                icohp: dict[Spin, float] = {}

                label = line_parts[0]
                atom1 = line_parts[1]
                atom2 = line_parts[2]
                length = float(line_parts[3])

                if version == "5.1.0":
                    num = 1
                    translation = (
                        int(line_parts[4]),
                        int(line_parts[5]),
                        int(line_parts[6]),
                    )
                    icohp[Spin.up] = float(line_parts[7])
                    if self.is_spin_polarized:
                        icohp[Spin.down] = float(line_parts[8])
                elif version == "3.1.1":
                    num = 1
                    translation = (
                        int(line_parts[4]),
                        int(line_parts[5]),
                        int(line_parts[6]),
                    )
                    icohp[Spin.up] = float(line_parts[7])
                    if self.is_spin_polarized:
                        icohp[Spin.down] = float(data_without_orbitals[bond + n_bonds + 1].split()[7])

                else:  # if version == "2.2.1":
                    num = int(line_parts[5])
                    translation = (0, 0, 0)
                    icohp[Spin.up] = float(line_parts[4])
                    if self.is_spin_polarized:
                        icohp[Spin.down] = float(data_without_orbitals[bond + n_bonds + 1].split()[4])

                labels.append(label)
                atom1_list.append(atom1)
                atom2_list.append(atom2)
                lens.append(length)
                translations.append(translation)
                nums.append(num)
                icohps.append(icohp)

            list_orb_icohp: list[dict] | None = None
            if self.orbitalwise:
                list_orb_icohp = []
                if version != "5.1.0":
                    n_orbs = len(data_orbitals) // 2 if self.is_spin_polarized else len(data_orbitals)
                else:
                    n_orbs = len(data_orbitals)

                for i_orb in range(n_orbs):
                    data_orb = data_orbitals[i_orb]
                    icohp = {}
                    line_parts = data_orb.split()
                    label = line_parts[0]
                    if not self.is_lcfo:  #  data consists of atomic orbital interactions
                        orbs = re.findall(r"_(.*?)(?=\s)", data_orb)
                        orb_label, orbitals = get_orb_from_str(orbs)
                        icohp[Spin.up] = float(line_parts[7])
                    else:  #  data consists of molecule or fragment orbital interactions
                        orbs = re.findall(r"_(\d+[a-zA-Z]+\d*)", data_orb)
                        orb_label = "-".join(orbs)
                        orbitals = orbs
                        icohp[Spin.up] = float(line_parts[7])

                    if self.is_spin_polarized and version != "5.1.0":
                        icohp[Spin.down] = float(data_orbitals[n_orbs + i_orb].split()[7])
                    elif self.is_spin_polarized and version == "5.1.0":
                        icohp[Spin.down] = float(data_orbitals[i_orb].split()[8])

                    if len(list_orb_icohp) < int(label):
                        list_orb_icohp.append({orb_label: {"icohp": icohp, "orbitals": orbitals}})
                    else:
                        list_orb_icohp[int(label) - 1][orb_label] = {
                            "icohp": icohp,
                            "orbitals": orbitals,
                        }

            # Avoid circular import
            from pymatgen.electronic_structure.cohp import IcohpCollection

            self._icohpcollection = IcohpCollection(
                are_coops=are_coops,
                are_cobis=are_cobis,
                list_labels=labels,
                list_atom1=atom1_list,
                list_atom2=atom2_list,
                list_length=lens,
                list_translation=translations,  # type: ignore[arg-type]
                list_num=nums,
                list_icohp=icohps,
                is_spin_polarized=self.is_spin_polarized,
                list_orb_icohp=list_orb_icohp,
            )

    @property
    def icohplist(self) -> dict[Any, dict[str, Any]]:
        """The ICOHP list compatible with older version of this class."""
        icohp_dict = {}
        if self._icohpcollection is None:
            raise ValueError(f"{self._icohpcollection=}")

        for key, value in self._icohpcollection._icohplist.items():
            icohp_dict[key] = {
                "length": value._length,
                "number_of_bonds": value._num,
                "icohp": value._icohp,
                "translation": value._translation,
                "orbitals": value._orbitals,
            }

        # for LCFO only files drop the single orbital resolved entry when not in orbitalwise mode
        if self.is_lcfo and not self.orbitalwise:
            icohp_dict = {k: d for k, d in icohp_dict.items() if d.get("orbitals") is None}
        return icohp_dict

    @property
    def icohpcollection(self) -> IcohpCollection | None:
        """The IcohpCollection object."""
        return self._icohpcollection


class NciCobiList:
    """Read NcICOBILIST (multi-center ICOBI) files generated by LOBSTER.

    Attributes:
        is_spin_polarized (bool): Whether the calculation is spin polarized.
        NciCobiList (dict): The listfile data of the form:
            {
                bond: {
                    "number_of_atoms": Number of atoms involved in the multi-center interaction,
                    "ncicobi": {Spin.up: Nc-ICOBI(Ef)_up, Spin.down: ...},
                    "interaction_type": Type of the multi-center interaction,
                    }
            }
    """

    def __init__(self, filename: PathLike = "NcICOBILIST.lobster") -> None:
        """

        LOBSTER < 4.1.0: no COBI/ICOBI/NcICOBI

        Args:
            filename: Name of the NcICOBILIST file.
        """
        # We don't need the header
        lines = _get_lines(filename)[1:]
        if len(lines) == 0:
            raise RuntimeError("NcICOBILIST file contains no data.")

        # If the calculation is spin-polarized, the line in the middle
        # of the file will be another header line.
        # TODO: adapt this for orbitalwise case
        self.is_spin_polarized = "spin" in lines[len(lines) // 2]

        # Check if orbitalwise NcICOBILIST
        # include case when there is only one NcICOBI
        self.orbital_wise = False  # set as default
        for entry in lines:  # NcICOBIs orbitalwise and non-orbitalwise can be mixed
            if len(lines) > 2 and "s]" in str(entry.split()[3:]):
                self.orbital_wise = True
                warnings.warn(
                    "This is an orbitalwise NcICOBILIST.lobster file. "
                    "Currently, the orbitalwise information is not read!",
                    stacklevel=2,
                )
                break  # condition has only to be met once

        if self.orbital_wise:
            data_without_orbitals = [
                line for line in lines if "_" not in str(line.split()[3:]) and "s]" not in str(line.split()[3:])
            ]

        else:
            data_without_orbitals = lines

        if "spin" in data_without_orbitals[len(data_without_orbitals) // 2]:
            # TODO: adapt this for orbitalwise case
            n_bonds = len(data_without_orbitals) // 2
            if n_bonds == 0:
                raise RuntimeError("NcICOBILIST file contains no data.")
        else:
            n_bonds = len(data_without_orbitals)

        self.list_labels = []
        self.list_n_atoms = []
        self.list_ncicobi = []
        self.list_interaction_type = []
        self.list_num = []

        for bond in range(n_bonds):
            line_parts = data_without_orbitals[bond].split()
            ncicobi = {}

            label = line_parts[0]
            n_atoms = line_parts[1]
            ncicobi[Spin.up] = float(line_parts[2])
            interaction_type = str(line_parts[3:]).replace("'", "").replace(" ", "")
            num = 1

            if self.is_spin_polarized:
                ncicobi[Spin.down] = float(data_without_orbitals[bond + n_bonds + 1].split()[2])

            self.list_labels.append(label)
            self.list_n_atoms.append(n_atoms)
            self.list_ncicobi.append(ncicobi)
            self.list_interaction_type.append(interaction_type)
            self.list_num.append(num)

        # TODO: add functions to get orbital resolved NcICOBIs

    @property
    def ncicobi_list(self) -> dict[Any, dict[str, Any]]:
        """
        Returns:
            dict: ncicobilist.
        """
        ncicobi_list = {}
        for idx in range(len(self.list_labels)):
            ncicobi_list[str(idx + 1)] = {
                "number_of_atoms": int(self.list_n_atoms[idx]),
                "ncicobi": self.list_ncicobi[idx],
                "interaction_type": self.list_interaction_type[idx],
            }

        return ncicobi_list


class Doscar:
    """Store LOBSTER's projected DOS and local projected DOS.
    The beforehand quantum-chemical calculation was performed with VASP.

    Attributes:
        completedos (LobsterCompleteDos): LobsterCompleteDos Object.
        pdos (list): List of Dict including NumPy arrays with pdos. Access as
            pdos[atomindex]['orbitalstring']['Spin.up/Spin.down'].
        tdos (Dos): Dos Object of the total density of states.
        energies (NDArray): Numpy array of the energies at which the DOS was calculated
            (in eV, relative to Efermi).
        tdensities (dict): tdensities[Spin.up]: NumPy array of the total density of states for
            the Spin.up contribution at each of the energies. tdensities[Spin.down]: NumPy array
            of the total density of states for the Spin.down contribution at each of the energies.
            If is_spin_polarized=False, tdensities[Spin.up]: NumPy array of the total density of states.
        itdensities (dict): itdensities[Spin.up]: NumPy array of the total density of states for
            the Spin.up contribution at each of the energies. itdensities[Spin.down]: NumPy array
            of the total density of states for the Spin.down contribution at each of the energies.
            If is_spin_polarized=False, itdensities[Spin.up]: NumPy array of the total density of states.
        is_spin_polarized (bool): Whether the system is spin polarized.
    """

    def __init__(
        self,
        doscar: PathLike = "DOSCAR.lobster",
        is_lcfo: bool = False,
        structure_file: PathLike | None = "POSCAR",
        structure: IStructure | Structure | None = None,
    ) -> None:
        """
        Args:
            doscar (PathLike): The DOSCAR file, typically "DOSCAR.lobster".
            is_lcfo (bool): Whether the DOSCAR file is from LCFO analysis.
            structure_file (PathLike): For VASP, this is typically "POSCAR".
            structure (Structure): Instead of a structure file (preferred),
                the Structure can be given directly.
        """
        self._doscar = doscar
        self._is_lcfo = is_lcfo

        self._final_structure = Structure.from_file(structure_file) if structure_file is not None else structure

        self._parse_doscar()

    def _parse_doscar(self):
        doscar = self._doscar

        tdensities = {}
        itdensities = {}
        with zopen(doscar, mode="rt", encoding="utf-8") as file:
            file.readline()  # Skip the first line
            efermi = float([file.readline() for nn in range(4)][3].split()[17])
            dos = []
            orbitals = []
            line = file.readline()  # Read the next line containing dos data
            while line.strip():
                if line.split():
                    ndos = int(line.split()[2])
                    orbitals += [line.split(";")[-1].split()]

                    line = file.readline().split()
                    cdos = np.zeros((ndos, len(line)))
                    cdos[0] = np.array(line)

                    for idx_dos in range(1, ndos):
                        line_parts = file.readline().split()
                        cdos[idx_dos] = np.array(line_parts)
                    dos.append(cdos)

                line = file.readline()  # Read the next line to continue the loop

        doshere = np.array(dos[0])
        if len(doshere[0, :]) == 5:
            self._is_spin_polarized = True
        elif len(doshere[0, :]) == 3:
            self._is_spin_polarized = False
        else:
            raise ValueError("There is something wrong with the DOSCAR. Can't extract spin polarization.")

        energies = doshere[:, 0]
        if not self._is_spin_polarized:
            tdensities[Spin.up] = doshere[:, 1]
            itdensities[Spin.up] = doshere[:, 2]
            pdoss = []
            spin = Spin.up
            for atom in range(len(dos) - 1):
                pdos = defaultdict(dict)
                data = dos[atom + 1]
                _, ncol = data.shape

                for orb_num, j in enumerate(range(1, ncol)):
                    orb = orbitals[atom + 1][orb_num]
                    pdos[orb][spin] = data[:, j]
                pdoss.append(pdos)
        else:
            tdensities[Spin.up] = doshere[:, 1]
            tdensities[Spin.down] = doshere[:, 2]
            itdensities[Spin.up] = doshere[:, 3]
            itdensities[Spin.down] = doshere[:, 4]
            pdoss = []
            for atom in range(len(dos) - 1):
                pdos = defaultdict(dict)
                data = dos[atom + 1]
                _, ncol = data.shape
                orb_num = 0
                for j in range(1, ncol):
                    spin = Spin.down if j % 2 == 0 else Spin.up
                    orb = orbitals[atom + 1][orb_num]
                    pdos[orb][spin] = data[:, j]
                    if j % 2 == 0:
                        orb_num += 1
                pdoss.append(pdos)

        self._efermi = efermi
        self._pdos = pdoss
        self._tdos = Dos(efermi, energies, tdensities)
        self._energies = energies
        self._tdensities = tdensities
        self._itdensities = itdensities
        final_struct = self._final_structure

        # for DOCAR.LCFO.lobster, pdos is different than for non-LCFO DOSCAR so we need to handle it differently
        # for now we just set pdos_dict to be empty if LCFO is in the filename
        # Todo: handle LCFO pdos properly in future when we have complete set of orbitals
        if not self._is_lcfo:
            pdoss_dict = {final_struct[i]: pdos for i, pdos in enumerate(self._pdos)}
        else:
            pdoss_dict = {final_struct[i]: {} for i, _ in enumerate(self._pdos)}

        self._completedos = LobsterCompleteDos(final_struct, self._tdos, pdoss_dict)

    @property
    def completedos(self) -> LobsterCompleteDos:
        """LobsterCompleteDos."""
        return self._completedos

    @property
    def pdos(self) -> list[dict]:
        """Projected DOS (PDOS)."""
        return self._pdos

    @property
    def tdos(self) -> Dos:
        """Total DOS (TDOS)."""
        return self._tdos

    @property
    def energies(self) -> NDArray:
        """Energies."""
        return self._energies

    @property
    def tdensities(self) -> dict[Spin, NDArray]:
        """Total DOS as a np.array."""
        return self._tdensities

    @property
    def itdensities(self) -> dict[Spin, NDArray]:
        """Integrated total DOS as a np.array."""
        return self._itdensities

    @property
    def is_spin_polarized(self) -> bool:
        """Whether run is spin polarized."""
        return self._is_spin_polarized


class Charge(MSONable):
    """Read CHARGE.lobster/ CHARGE.LCFO.lobster files generated by LOBSTER.

    Attributes:
        atomlist (list[str]): List of atoms in CHARGE.lobster.
        is_lcfo (bool): Whether the CHARGE file is from LCFO analysis. Default is False.
        types (list[str]): List of types of atoms in CHARGE.lobster.
        mulliken (list[float]): List of Mulliken charges of atoms in CHARGE.lobster.
        loewdin (list[float]): List of Loewdin charges of atoms in CHARGE.Loewdin.
        num_atoms (int): Number of atoms in CHARGE.lobster.
    """

    def __init__(
        self,
        filename: PathLike = "CHARGE.lobster",
        is_lcfo: bool = False,
        num_atoms: int | None = None,
        atomlist: list[str] | None = None,
        types: list[str] | None = None,
        mulliken: list[float] | None = None,
        loewdin: list[float] | None = None,
    ) -> None:
        """
        Args:
            filename (PathLike): The CHARGE file, typically "CHARGE.lobster".
            is_lcfo (bool): Whether the CHARGE file is from LCFO analysis. Default is False.
            num_atoms (int): Number of atoms in the structure.
            atomlist (list[str]): Atoms in the structure.
            types (list[str]): Unique species in the structure.
            mulliken (list[float]): Mulliken charges.
            loewdin (list[float]): Loewdin charges.
        """
        self._filename = filename
        self.is_lcfo = is_lcfo
        self.num_atoms = num_atoms
        self.types = [] if types is None else types
        self.atomlist = [] if atomlist is None else atomlist
        self.mulliken = [] if mulliken is None else mulliken
        self.loewdin = [] if loewdin is None else loewdin

        if self.num_atoms is None:
            lines = _get_lines(filename)[3:-2]
            if len(lines) == 0:
                raise RuntimeError("CHARGES file contains no data.")

            self.num_atoms = len(lines)
            for atom_idx in range(self.num_atoms):
                line_parts = lines[atom_idx].split()
                self.atomlist.append(line_parts[1] + line_parts[0])
                self.types.append(line_parts[1])
                if not self.is_lcfo:
                    self.mulliken.append(float(line_parts[2]))
                    self.loewdin.append(float(line_parts[3]))
                else:
                    self.loewdin.append(float(line_parts[2]))

    def get_structure_with_charges(self, structure_filename: PathLike) -> Structure:
        """Get a Structure with Mulliken and Loewdin charges as site properties

        Args:
            structure_filename (PathLike): The POSCAR file.

        Returns:
            Structure Object with Mulliken and Loewdin charges as site properties.
        """
        struct = Structure.from_file(structure_filename)
        if not self.is_lcfo:
            mulliken = self.mulliken
            loewdin = self.loewdin
            site_properties = {"Mulliken Charges": mulliken, "Loewdin Charges": loewdin}
            return struct.copy(site_properties=site_properties)
        raise ValueError(
            "CHARGE.LCFO.lobster charges are not sorted site wise. Thus, the site properties cannot be added.",
        )

    @property
    @deprecated(message="Use `mulliken` instead.", category=DeprecationWarning)
    def Mulliken(self) -> list[float]:
        return self.mulliken

    @property
    @deprecated(message="Use `loewdin` instead.", category=DeprecationWarning)
    def Loewdin(self) -> list[float]:
        return self.loewdin


class Lobsterout(MSONable):
    """Read the lobsterout and evaluate the spilling, save the basis, save warnings, save info.

    Attributes:
        basis_functions (list[str]): Basis functions that were used in lobster run as strings.
        basis_type (list[str]): Basis types that were used in lobster run as strings.
        charge_spilling (list[float]): Charge spilling (first entry: result for spin 1,
            second entry: result for spin 2 or not present).
        dft_program (str): The DFT program used for the calculation of the wave function.
        elements (list[str]): Elements that were present in LOBSTER calculation.
        has_charge (bool): Whether CHARGE.lobster is present.
        has_cohpcar (bool): Whether COHPCAR.lobster and ICOHPLIST.lobster are present.
        has_madelung (bool): Whether SitePotentials.lobster and MadelungEnergies.lobster are present.
        has_coopcar (bool): Whether COOPCAR.lobster and ICOOPLIST.lobster are present.
        has_cobicar (bool): Whether COBICAR.lobster and ICOBILIST.lobster are present.
        has_doscar (bool): Whether DOSCAR.lobster is present.
        has_doscar_lso (bool): Whether DOSCAR.LSO.lobster is present.
        has_projection (bool): Whether projectionData.lobster is present.
        has_bandoverlaps (bool): Whether bandOverlaps.lobster is present.
        has_density_of_energies (bool): Whether DensityOfEnergy.lobster is present.
        has_fatbands (bool): Whether fatband calculation was performed.
        has_grosspopulation (bool): Whether GROSSPOP.lobster is present.
        has_polarization (bool): Whether POLARIZATION.lobster is present.
        info_lines (str): Additional information on the run.
        info_orthonormalization (str): Information on orthonormalization.
        is_restart_from_projection (bool): Whether that calculation was restarted
            from an existing projection file.
        lobster_version (str): The LOBSTER version.
        number_of_spins (int): The number of spins.
        number_of_threads (int): How many threads were used.
        timing (dict[str, float]): Dict with infos on timing.
        total_spilling (list[float]): The total spilling for spin channel 1 (and spin channel 2).
        warning_lines (str): String with all warnings.
    """

    # Valid Lobsterout attributes
    _ATTRIBUTES: ClassVar[set[str]] = {
        "filename",
        "is_restart_from_projection",
        "lobster_version",
        "number_of_threads",
        "dft_program",
        "number_of_spins",
        "charge_spilling",
        "total_spilling",
        "elements",
        "basis_type",
        "basis_functions",
        "timing",
        "warning_lines",
        "info_orthonormalization",
        "info_lines",
        "has_doscar",
        "has_doscar_lso",
        "has_doscar_lcfo",
        "has_cohpcar",
        "has_cohpcar_lcfo",
        "has_coopcar",
        "has_coopcar_lcfo",
        "has_cobicar",
        "has_cobicar_lcfo",
        "has_charge",
        "has_madelung",
        "has_mofecar",
        "has_projection",
        "has_bandoverlaps",
        "has_fatbands",
        "has_grosspopulation",
        "has_polarization",
        "has_density_of_energies",
    }

    # TODO: add tests for skipping COBI and Madelung
    # TODO: add tests for including COBI and Madelung
    def __init__(self, filename: PathLike | None, **kwargs) -> None:
        """
        Args:
            filename (PathLike): The lobsterout file.
            **kwargs: dict to initialize Lobsterout instance
        """
        self.filename = filename
        if kwargs:
            for attr, val in kwargs.items():
                if attr in self._ATTRIBUTES:
                    setattr(self, attr, val)
                else:
                    raise ValueError(f"{attr}={val} is not a valid attribute for Lobsterout")
        elif filename:
            lines = _get_lines(filename)
            if len(lines) == 0:
                raise RuntimeError("lobsterout does not contain any data")

            # Check if LOBSTER starts from a projection
            self.is_restart_from_projection = "loading projection from projectionData.lobster..." in lines

            self.lobster_version = self._get_lobster_version(data=lines)

            self.number_of_threads = self._get_threads(data=lines)
            self.dft_program = self._get_dft_program(data=lines)

            self.number_of_spins = self._get_number_of_spins(data=lines)
            chargespilling, totalspilling = self._get_spillings(data=lines, number_of_spins=self.number_of_spins)
            self.charge_spilling = chargespilling
            self.total_spilling = totalspilling

            elements, basistype, basisfunctions = self._get_elements_basistype_basisfunctions(data=lines)
            self.elements = elements
            self.basis_type = basistype
            self.basis_functions = basisfunctions

            wall_time, user_time, sys_time = self._get_timing(data=lines)
            self.timing = {
                "wall_time": wall_time,
                "user_time": user_time,
                "sys_time": sys_time,
            }

            warninglines = self._get_all_warning_lines(data=lines)
            self.warning_lines = warninglines

            orthowarning = self._get_warning_orthonormalization(data=lines)
            self.info_orthonormalization = orthowarning

            infos = self._get_all_info_lines(data=lines)
            self.info_lines = infos

            self.has_doscar = "writing DOSCAR.lobster..." in lines and "SKIPPING writing DOSCAR.lobster..." not in lines
            self.has_doscar_lso = (
                "writing DOSCAR.LSO.lobster..." in lines and "SKIPPING writing DOSCAR.LSO.lobster..." not in lines
            )

            try:
                version_number = float(".".join(self.lobster_version.strip("v").split(".")[:2]))
            except ValueError:
                version_number = 0.0

            if version_number < 5.1:
                self.has_cohpcar = (
                    "writing COHPCAR.lobster and ICOHPLIST.lobster..." in lines
                    and "SKIPPING writing COHPCAR.lobster and ICOHPLIST.lobster..." not in lines
                )
                self.has_coopcar = (
                    "writing COOPCAR.lobster and ICOOPLIST.lobster..." in lines
                    and "SKIPPING writing COOPCAR.lobster and ICOOPLIST.lobster..." not in lines
                )
                self.has_cobicar = (
                    "writing COBICAR.lobster and ICOBILIST.lobster..." in lines
                    and "SKIPPING writing COBICAR.lobster and ICOBILIST.lobster..." not in lines
                )
            else:
                self.has_cohpcar = (
                    "writing COHPCAR.lobster..." in lines and "SKIPPING writing COHPCAR.lobster..." not in lines
                )
                self.has_coopcar = (
                    "writing COOPCAR.lobster..." in lines and "SKIPPING writing COOPCAR.lobster..." not in lines
                )
                self.has_cobicar = (
                    "writing COBICAR.lobster..." in lines
                    or "Writing COBICAR.lobster, ICOBILIST.lobster and NcICOBILIST.lobster..." in lines
                ) and "SKIPPING writing COBICAR.lobster..." not in lines

            self.has_cobicar_lcfo = "writing COBICAR.LCFO.lobster..." in lines
            self.has_cohpcar_lcfo = "writing COHPCAR.LCFO.lobster..." in lines
            self.has_coopcar_lcfo = "writing COOPCAR.LCFO.lobster..." in lines
            self.has_doscar_lcfo = "writing DOSCAR.LCFO.lobster..." in lines
            self.has_polarization = "writing polarization to POLARIZATION.lobster..." in lines
            self.has_charge = "SKIPPING writing CHARGE.lobster..." not in lines
            self.has_projection = "saving projection to projectionData.lobster..." in lines
            self.has_bandoverlaps = (
                "WARNING: I dumped the band overlap matrices to the file bandOverlaps.lobster." in lines
            )
            self.has_fatbands = self._has_fatband(data=lines)
            self.has_grosspopulation = "writing CHARGE.lobster and GROSSPOP.lobster..." in lines
            self.has_density_of_energies = "writing DensityOfEnergy.lobster..." in lines
            self.has_madelung = (
                "writing SitePotentials.lobster and MadelungEnergies.lobster..." in lines
                and "skipping writing SitePotentials.lobster and MadelungEnergies.lobster..." not in lines
            )
            self.has_mofecar = "Writing MOFECAR.lobster and IMOFELIST.lobster..." in lines
        else:
            raise ValueError("must provide either filename or kwargs to initialize Lobsterout")

    def get_doc(self) -> dict[str, Any]:
        """Get a dict with all information stored in lobsterout."""
        return {
            # Check if LOBSTER starts from a projection
            "restart_from_projection": self.is_restart_from_projection,
            "lobster_version": self.lobster_version,
            "threads": self.number_of_threads,
            "dft_program": self.dft_program,
            "charge_spilling": self.charge_spilling,
            "total_spilling": self.total_spilling,
            "elements": self.elements,
            "basis_type": self.basis_type,
            "basis_functions": self.basis_functions,
            "timing": self.timing,
            "warning_lines": self.warning_lines,
            "info_orthonormalization": self.info_orthonormalization,
            "info_lines": self.info_lines,
            "has_doscar": self.has_doscar,
            "has_doscar_lso": self.has_doscar_lso,
            "has_doscar_lcfo": self.has_doscar_lcfo,
            "has_cohpcar": self.has_cohpcar,
            "has_cohpcar_lcfo": self.has_cohpcar_lcfo,
            "has_coopcar": self.has_coopcar,
            "has_coopcar_lcfo": self.has_coopcar_lcfo,
            "has_cobicar": self.has_cobicar,
            "has_cobicar_lcfo": self.has_cobicar_lcfo,
            "has_charge": self.has_charge,
            "has_madelung": self.has_madelung,
            "has_mofecar": self.has_mofecar,
            "has_projection": self.has_projection,
            "has_bandoverlaps": self.has_bandoverlaps,
            "has_fatbands": self.has_fatbands,
            "has_grosspopulation": self.has_grosspopulation,
            "has_polarization": self.has_polarization,
            "has_density_of_energies": self.has_density_of_energies,
        }

    def as_dict(self) -> dict[str, Any]:
        """MSONable dict."""
        dct = dict(vars(self))
        dct["@module"] = type(self).__module__
        dct["@class"] = type(self).__name__

        return dct

    @staticmethod
    def _get_lobster_version(data: list[str]) -> str:
        """Get LOBSTER version."""
        for line in data:
            line_parts = line.split()
            if len(line_parts) > 1 and line_parts[0] == "LOBSTER":
                return line_parts[1]
        raise RuntimeError("Version not found.")

    @staticmethod
    def _has_fatband(data: list[str]) -> bool:
        """Check whether calculation has hatband data."""
        for line in data:
            line_parts = line.split()
            if len(line_parts) > 1 and line_parts[1] == "FatBand":
                return True
        return False

    @staticmethod
    def _get_dft_program(data: list[str]) -> str | None:
        """Get the DFT program used for calculation."""
        for line in data:
            line_parts = line.split()
            if len(line_parts) > 4 and line_parts[3] == "program...":
                return line_parts[4]
        return None

    @staticmethod
    def _get_number_of_spins(data: list[str]) -> Literal[1, 2]:
        """Get index of spin channel."""
        return 2 if "spillings for spin channel 2" in data else 1

    @staticmethod
    def _get_threads(data: list[str]) -> int:
        """Get number of CPU threads."""
        for line in data:
            line_parts = line.split()
            if len(line_parts) > 11 and line_parts[11] in {"threads", "thread"}:
                return int(line_parts[10])
        raise ValueError("Threads not found.")

    @staticmethod
    def _get_spillings(
        data: list[str],
        number_of_spins: Literal[1, 2],
    ) -> tuple[list[float], list[float]]:
        """Get charge spillings and total spillings."""
        charge_spillings = []
        total_spillings = []
        for line in data:
            line_parts = line.split()
            if len(line_parts) > 2 and line_parts[2] == "spilling:":
                if line_parts[1] == "charge":
                    charge_spillings.append(float(line_parts[3].replace("%", "")) / 100.0)
                elif line_parts[1] == "total":
                    total_spillings.append(float(line_parts[3].replace("%", "")) / 100.0)

            if len(charge_spillings) == number_of_spins and len(total_spillings) == number_of_spins:
                break

        return charge_spillings, total_spillings

    @staticmethod
    def _get_elements_basistype_basisfunctions(
        data: list[str],
    ) -> tuple[list[str], list[str], list[list[str]]]:
        """Get elements, basis types and basis functions."""
        begin = False
        end = False
        elements: list[str] = []
        basistypes: list[str] = []
        basisfunctions: list[list[str]] = []
        for line in data:
            if begin and not end:
                line_parts = line.split()
                if line_parts[0] not in {
                    "INFO:",
                    "WARNING:",
                    "setting",
                    "calculating",
                    "post-processing",
                    "saving",
                    "spillings",
                    "writing",
                }:
                    elements.append(line_parts[0])
                    basistypes.append(line_parts[1].replace("(", "").replace(")", ""))
                    # Last sign is ''
                    basisfunctions.append(line_parts[2:])
                else:
                    end = True

            if "setting up local basis functions..." in line:
                begin = True
        return elements, basistypes, basisfunctions

    @staticmethod
    def _get_timing(
        data: list[str],
    ) -> tuple[dict[str, str], dict[str, str], dict[str, str]]:
        """Get wall time, user time and system time."""
        begin = False
        user_times, wall_times, sys_times = [], [], []

        for line in data:
            line_parts = line.split()
            if "finished" in line_parts:
                begin = True
            if begin:
                if "wall" in line_parts:
                    wall_times = line_parts[2:10]
                if "user" in line_parts:
                    user_times = line_parts[:8]
                if "sys" in line_parts:
                    sys_times = line_parts[:8]

        wall_time_dict = {
            "h": wall_times[0],
            "min": wall_times[2],
            "s": wall_times[4],
            "ms": wall_times[6],
        }
        user_time_dict = {
            "h": user_times[0],
            "min": user_times[2],
            "s": user_times[4],
            "ms": user_times[6],
        }
        sys_time_dict = {
            "h": sys_times[0],
            "min": sys_times[2],
            "s": sys_times[4],
            "ms": sys_times[6],
        }

        return wall_time_dict, user_time_dict, sys_time_dict

    @staticmethod
    def _get_warning_orthonormalization(data: list[str]) -> list[str]:
        """Get orthonormalization warnings."""
        orthowarnings = []
        for line in data:
            line_parts = line.split()
            if "orthonormalized" in line_parts:
                orthowarnings.append(" ".join(line_parts[1:]))
        return orthowarnings

    @staticmethod
    def _get_all_warning_lines(data: list[str]) -> list[str]:
        """Get all WARNING lines."""
        warnings_ = []
        for line in data:
            line_parts = line.split()
            if len(line_parts) > 0 and line_parts[0] == "WARNING:":
                warnings_.append(" ".join(line_parts[1:]))
        return warnings_

    @staticmethod
    def _get_all_info_lines(data: list[str]) -> list[str]:
        """Get all INFO lines."""
        infos = []
        for line in data:
            line_parts = line.split()
            if len(line_parts) > 0 and line_parts[0] == "INFO:":
                infos.append(" ".join(line_parts[1:]))
        return infos


class Fatband:
    """Read FATBAND_x_y.lobster files.

    Attributes:
        efermi (float): Fermi level read from vasprun.xml.
        eigenvals (dict[Spin, NDArray]): Eigenvalues as a dictionary of NumPy arrays of shape (nbands, nkpoints).
            The first index of the array refers to the band and the second to the index of the kpoint.
            The kpoints are ordered according to the order of the kpoints_array attribute.
            If the band structure is not spin polarized, we only store one data set under Spin.up.
        is_spin_polarized (bool): Whether this was a spin-polarized calculation.
        kpoints_array (list[NDArray]): List of kpoints as NumPy arrays, in frac_coords of the given
            lattice by default.
        label_dict (dict[str, Union[str, NDArray]]): Dictionary that links a kpoint (in frac coords or Cartesian
            coordinates depending on the coords attribute) to a label.
        lattice (Lattice): Lattice object of reciprocal lattice as read from vasprun.xml.
        nbands (int): Number of bands used in the calculation.
        p_eigenvals (dict[Spin, NDArray]): Dictionary of orbital projections as {spin: array of dict}.
            The indices of the array are [band_index, kpoint_index].
            The dict is then built the following way: {"string of element": "string of orbital as read in
            from FATBAND file"}. If the band structure is not spin polarized, we only store one data set under Spin.up.
        structure (Structure): Structure object.
    """

    def __init__(
        self,
        filenames: PathLike | list[PathLike] = ".",
        kpoints_file: PathLike = "KPOINTS",
        vasprun_file: PathLike | None = "vasprun.xml",
        structure: Structure | IStructure | None = None,
        efermi: float | None = None,
    ) -> None:
        """
        Args:
            filenames (PathLike | list[PathLike]): File names or path to a
                folder from which all "FATBAND_*" files will be read.
            kpoints_file (PathLike): KPOINTS file for bandstructure calculation, typically "KPOINTS".
            vasprun_file (PathLike): Corresponding vasprun.xml file. Instead, the
                Fermi level from the DFT run can be provided. Then, this should be set to None.
            structure (Structure): Structure object.
            efermi (float): Fermi level in eV.
        """
        warnings.warn(
            "Make sure all relevant FATBAND files were generated and read in!",
            stacklevel=2,
        )
        warnings.warn(
            "Use Lobster 3.2.0 or newer for fatband calculations!",
            stacklevel=2,
        )

        if structure is None:
            raise ValueError("A structure object has to be provided")
        self.structure = structure
        if vasprun_file is None and efermi is None:
            raise ValueError("vasprun_file or efermi have to be provided")

        self.lattice = self.structure.lattice.reciprocal_lattice
        if vasprun_file is not None:
            self.efermi = Vasprun(
                filename=vasprun_file,
                ionic_step_skip=None,
                ionic_step_offset=0,
                parse_dos=True,
                parse_eigen=False,
                parse_projected_eigen=False,
                parse_potcar_file=False,
                occu_tol=1e-8,
                exception_on_bad_xml=True,
            ).efermi
        else:
            self.efermi = efermi
        kpoints_object = Kpoints.from_file(kpoints_file)

        # atom_type = []
        atom_names = []
        orbital_names = []
        parameters = []

        if not isinstance(filenames, list) or filenames is None:
            if filenames is None:
                filenames = "."

            filenames_new = [
                os.path.join(filenames, name)
                for name in os.listdir(filenames)
                if fnmatch.fnmatch(name, "FATBAND_*.lobster")
            ]

            filenames = cast("list[PathLike]", filenames_new)

        if len(filenames) == 0:
            raise ValueError("No FATBAND files in folder or given")

        for fname in filenames:
            lines = _get_lines(fname)

            atom_names.append(os.path.split(fname)[1].split("_")[1].capitalize())
            parameters = lines[0].split()
            # atom_type.append(re.split(r"[0-9]+", parameters[3])[0].capitalize())
            orbital_names.append(parameters[4])

        # Get atomtype orbital dict
        atom_orbital_dict: dict[str, list[str]] = {}
        for idx, atom in enumerate(atom_names):
            if atom not in atom_orbital_dict:
                atom_orbital_dict[atom] = []
            atom_orbital_dict[atom].append(orbital_names[idx])

        # Test if there are the same orbitals twice or if two different
        # formats were used or if all necessary orbitals are there
        for items in atom_orbital_dict.values():
            if len(set(items)) != len(items):
                raise ValueError("The are two FATBAND files for the same atom and orbital. The program will stop.")
            split = [item.split("_")[0] for item in items]
            for number in collections.Counter(split).values():
                if number not in {1, 3, 5, 7}:
                    raise ValueError(
                        "Make sure all relevant orbitals were generated and that no duplicates (2p and 2p_x) are "
                        "present"
                    )

        kpoints_array: list = []
        eigenvals: dict = {}
        p_eigenvals: dict = {}
        for ifilename, filename in enumerate(filenames):
            lines = _get_lines(filename)

            if ifilename == 0:
                self.nbands = int(parameters[6])
                self.number_kpts = kpoints_object.num_kpts - int(lines[1].split()[2]) + 1

            if len(lines[1:]) == self.nbands + 2:
                self.is_spinpolarized = False
            elif len(lines[1:]) == self.nbands * 2 + 2:
                self.is_spinpolarized = True
            else:
                linenumbers = []
                for iline, line in enumerate(lines[1 : self.nbands * 2 + 4]):
                    if line.split()[0] == "#":
                        linenumbers.append(iline)

                if ifilename == 0:
                    self.is_spinpolarized = len(linenumbers) == 2

            if ifilename == 0:
                eigenvals = {}
                eigenvals[Spin.up] = [[defaultdict(float) for _ in range(self.number_kpts)] for _ in range(self.nbands)]
                if self.is_spinpolarized:
                    eigenvals[Spin.down] = [
                        [defaultdict(float) for _ in range(self.number_kpts)] for _ in range(self.nbands)
                    ]

                p_eigenvals = {}
                p_eigenvals[Spin.up] = [
                    [
                        {
                            str(elem): {str(orb): defaultdict(float) for orb in atom_orbital_dict[elem]}
                            for elem in atom_names
                        }
                        for _ in range(self.number_kpts)
                    ]
                    for _ in range(self.nbands)
                ]

                if self.is_spinpolarized:
                    p_eigenvals[Spin.down] = [
                        [
                            {
                                str(elem): {str(orb): defaultdict(float) for orb in atom_orbital_dict[elem]}
                                for elem in atom_names
                            }
                            for _ in range(self.number_kpts)
                        ]
                        for _ in range(self.nbands)
                    ]

            idx_kpt = -1
            linenumber = iband = 0
            for line in lines[1:]:
                if line.split()[0] == "#":
                    KPOINT = np.array(
                        [
                            float(line.split()[4]),
                            float(line.split()[5]),
                            float(line.split()[6]),
                        ]
                    )
                    if ifilename == 0:
                        kpoints_array.append(KPOINT)

                    linenumber = iband = 0
                    idx_kpt += 1
                if linenumber == self.nbands:
                    iband = 0
                if line.split()[0] != "#":
                    if linenumber < self.nbands:
                        if ifilename == 0 and self.efermi is not None:
                            eigenvals[Spin.up][iband][idx_kpt] = float(line.split()[1]) + self.efermi

                        p_eigenvals[Spin.up][iband][idx_kpt][atom_names[ifilename]][orbital_names[ifilename]] = float(
                            line.split()[2]
                        )
                    if linenumber >= self.nbands and self.is_spinpolarized:
                        if ifilename == 0 and self.efermi is not None:
                            eigenvals[Spin.down][iband][idx_kpt] = float(line.split()[1]) + self.efermi
                        p_eigenvals[Spin.down][iband][idx_kpt][atom_names[ifilename]][orbital_names[ifilename]] = float(
                            line.split()[2]
                        )

                    linenumber += 1
                    iband += 1

        self.kpoints_array = kpoints_array
        self.eigenvals = eigenvals
        self.p_eigenvals = p_eigenvals

        label_dict = {}
        if kpoints_object.labels is not None:
            for idx, label in enumerate(kpoints_object.labels[-self.number_kpts :], start=0):
                if label is not None:
                    label_dict[label] = kpoints_array[idx]

        self.label_dict = label_dict

    def get_bandstructure(self) -> LobsterBandStructureSymmLine:
        """Get a LobsterBandStructureSymmLine object which can be plotted with a normal BSPlotter."""
        return LobsterBandStructureSymmLine(
            kpoints=self.kpoints_array,
            eigenvals=self.eigenvals,
            lattice=self.lattice,
            efermi=self.efermi,  # type: ignore[arg-type]
            labels_dict=self.label_dict,
            structure=self.structure,  # type: ignore[arg-type]
            projections=self.p_eigenvals,
        )


class Bandoverlaps(MSONable):
    """Read bandOverlaps.lobster files, which are not created during every LOBSTER run.

    Attributes:
        band_overlaps_dict (dict[Spin, Dict[str, Dict[str, Union[float, NDArray]]]]): A dictionary
            containing the band overlap data of the form: {spin: {"kpoint as string": {"maxDeviation":
            float that describes the max deviation, "matrix": 2D array of the size number of bands
            times number of bands including the overlap matrices with}}}.
        max_deviation (list[float]): The maximal deviation for each problematic kpoint.
    """

    def __init__(
        self,
        filename: PathLike = "bandOverlaps.lobster",
        band_overlaps_dict: dict[Spin, dict] | None = None,
        max_deviation: list[float] | None = None,
    ) -> None:
        """
        Args:
            filename (PathLike): The "bandOverlaps.lobster" file.
            band_overlaps_dict: The band overlap data of the form:
                {
                    spin: {
                        "k_points" : list of k-point array,
                        "max_deviations": list of max deviations associated with each k-point,
                        "matrices": list of the overlap matrices associated with each k-point,
                    }
                }.
            max_deviation (list[float]): The maximal deviations for each problematic k-point.
        """
        self._filename = filename
        self.band_overlaps_dict = {} if band_overlaps_dict is None else band_overlaps_dict
        self.max_deviation = [] if max_deviation is None else max_deviation

        if not self.band_overlaps_dict:
            lines = _get_lines(filename)

            spin_numbers = [0, 1] if lines[0].split()[-1] == "0" else [1, 2]

            self._filename = filename
            self._read(lines, spin_numbers)

    def _read(self, lines: list[str], spin_numbers: list[int]) -> None:
        """Read all lines of the file.

        Args:
            lines (list[str]): Lines of the file.
            spin_numbers (list[int]): Spin numbers depending on LOBSTER version.
        """
        spin: Spin = Spin.up
        kpoint_array: list = []
        overlaps: list = []
        # This has to be done like this because there can be different numbers
        # of problematic k-points per spin
        for line in lines:
            if f"Overlap Matrix (abs) of the orthonormalized projected bands for spin {spin_numbers[0]}" in line:
                spin = Spin.up

            elif f"Overlap Matrix (abs) of the orthonormalized projected bands for spin {spin_numbers[1]}" in line:
                spin = Spin.down

            elif "k-point" in line:
                kpoint = line.split(" ")
                kpoint_array = []
                for kpointel in kpoint:
                    if kpointel not in {"at", "k-point", ""}:
                        kpoint_array.append(float(kpointel))

            elif "maxDeviation" in line:
                if spin not in self.band_overlaps_dict:
                    self.band_overlaps_dict[spin] = {}
                if "k_points" not in self.band_overlaps_dict[spin]:
                    self.band_overlaps_dict[spin]["k_points"] = []
                if "max_deviations" not in self.band_overlaps_dict[spin]:
                    self.band_overlaps_dict[spin]["max_deviations"] = []
                if "matrices" not in self.band_overlaps_dict[spin]:
                    self.band_overlaps_dict[spin]["matrices"] = []

                maxdev = line.split(" ")[2]
                self.band_overlaps_dict[spin]["max_deviations"].append(float(maxdev))
                self.band_overlaps_dict[spin]["k_points"].append(kpoint_array)
                self.max_deviation.append(float(maxdev))
                overlaps = []

            else:
                _lines = [float(el) for el in line.split(" ") if el != ""]

                overlaps.append(_lines)
                if len(overlaps) == len(_lines):
                    self.band_overlaps_dict[spin]["matrices"].append(np.array(overlaps))

    def has_good_quality_maxDeviation(self, limit_maxDeviation: float = 0.1) -> bool:
        """Check if the maxDeviation from the ideal bandoverlap is smaller
        or equal to a limit.

        Args:
            limit_maxDeviation (float): Upper Limit of the maxDeviation.

        Returns:
            bool: Whether the ideal bandoverlap is smaller or equal to the limit.
        """
        return all(deviation <= limit_maxDeviation for deviation in self.max_deviation)

    def has_good_quality_check_occupied_bands(
        self,
        number_occ_bands_spin_up: int,
        number_occ_bands_spin_down: int | None = None,
        spin_polarized: bool = False,
        limit_deviation: float = 0.1,
    ) -> bool:
        """Check if the deviation from the ideal bandoverlap of all occupied bands
        is smaller or equal to limit_deviation.

        Args:
            number_occ_bands_spin_up (int): Number of occupied bands of spin up.
            number_occ_bands_spin_down (int): Number of occupied bands of spin down.
            spin_polarized (bool): Whether this is a spin polarized calculation.
            limit_deviation (float): Upper limit of the maxDeviation.

        Returns:
            bool: True if the quality of the projection is good.
        """
        if spin_polarized and number_occ_bands_spin_down is None:
            raise ValueError("number_occ_bands_spin_down has to be specified")

        for spin in (Spin.up, Spin.down) if spin_polarized else (Spin.up,):
            if spin is Spin.up:
                num_occ_bands = number_occ_bands_spin_up
            else:
                if number_occ_bands_spin_down is None:
                    raise ValueError("number_occ_bands_spin_down has to be specified")
                num_occ_bands = number_occ_bands_spin_down

            for overlap_matrix in self.band_overlaps_dict[spin]["matrices"]:
                sub_array = np.asarray(overlap_matrix)[:num_occ_bands, :num_occ_bands]

                if not np.allclose(sub_array, np.identity(num_occ_bands), atol=limit_deviation, rtol=0):
                    return False

        return True

    @property
    @deprecated(message="Use `band_overlaps_dict` instead.", category=DeprecationWarning)
    def bandoverlapsdict(self) -> dict:
        return self.band_overlaps_dict


class Grosspop(MSONable):
    """Read GROSSPOP.lobster/ GROSSPOP.LCFO.lobster files.

    Attributes:
        list_dict_grosspop (list[dict[str, str| dict[str, str]]]): List of dictionaries
            including all information about the grosspopulations. Each dictionary contains the following keys:
            - 'element': The element symbol of the atom.
            - 'Mulliken GP': A dictionary of Mulliken gross populations, where the keys are the orbital labels and the
                values are the corresponding gross populations as strings.
            - 'Loewdin GP': A dictionary of Loewdin gross populations, where the keys are the orbital labels and the
                values are the corresponding gross populations as strings.
            The 0th entry of the list refers to the first atom in GROSSPOP.lobster and so on.
    """

    def __init__(
        self,
        filename: PathLike = "GROSSPOP.lobster",
        is_lcfo: bool = False,
        list_dict_grosspop: list[dict] | None = None,
    ) -> None:
        """
        Args:
            filename (PathLike): The "GROSSPOP.lobster" file.
            is_lcfo (bool): Whether the GROSSPOP file is in LCFO format.
            list_dict_grosspop (list[dict]): All information about the gross populations.
        """
        self._filename = filename
        self.is_lcfo = is_lcfo
        self.list_dict_grosspop = [] if list_dict_grosspop is None else list_dict_grosspop
        if not self.list_dict_grosspop:
            lines = _get_lines(filename)

            # Read file to list of dict
            small_dict: dict[str, Any] = {}
            for line in lines[3:]:
                cleanlines = [idx for idx in line.split(" ") if idx != ""]
                if len(cleanlines) == 5 and cleanlines[0].isdigit() and not self.is_lcfo:
                    small_dict = {
                        "Mulliken GP": {},
                        "Loewdin GP": {},
                        "element": cleanlines[1],
                    }
                    small_dict["Mulliken GP"][cleanlines[2]] = float(cleanlines[3])
                    small_dict["Loewdin GP"][cleanlines[2]] = float(cleanlines[4])
                elif len(cleanlines) == 4 and cleanlines[0].isdigit() and self.is_lcfo:
                    small_dict = {"Loewdin GP": {}, "mol": cleanlines[1]}
                    small_dict["Loewdin GP"][cleanlines[2]] = float(cleanlines[3])
                elif len(cleanlines) == 5 and cleanlines[0].isdigit() and self.is_lcfo:
                    small_dict = {"Loewdin GP": {}, "mol": cleanlines[1]}
                    small_dict["Loewdin GP"][cleanlines[2]] = {
                        Spin.up: float(cleanlines[3]),
                        Spin.down: float(cleanlines[4]),
                    }
                elif len(cleanlines) == 5 and not cleanlines[0].isdigit():
                    small_dict["Mulliken GP"][cleanlines[0]] = {
                        Spin.up: float(cleanlines[1]),
                        Spin.down: float(cleanlines[2]),
                    }
                    small_dict["Loewdin GP"][cleanlines[0]] = {
                        Spin.up: float(cleanlines[3]),
                        Spin.down: float(cleanlines[4]),
                    }
                    if "total" in cleanlines[0]:
                        self.list_dict_grosspop.append(small_dict)
                elif len(cleanlines) == 7 and cleanlines[0].isdigit():
                    small_dict = {
                        "Mulliken GP": {},
                        "Loewdin GP": {},
                        "element": cleanlines[1],
                    }
                    small_dict["Mulliken GP"][cleanlines[2]] = {
                        Spin.up: float(cleanlines[3]),
                        Spin.down: float(cleanlines[4]),
                    }
                    small_dict["Loewdin GP"][cleanlines[2]] = {
                        Spin.up: float(cleanlines[5]),
                        Spin.down: float(cleanlines[6]),
                    }

                elif len(cleanlines) > 0 and "spin" not in line and self.is_lcfo:
                    if len(cleanlines) == 2:
                        small_dict["Loewdin GP"][cleanlines[0]] = float(cleanlines[1])
                    else:
                        small_dict["Loewdin GP"][cleanlines[0]] = {
                            Spin.up: float(cleanlines[1]),
                            Spin.down: float(cleanlines[2]),
                        }
                    if "total" in cleanlines[0]:
                        self.list_dict_grosspop.append(small_dict)
                elif len(cleanlines) > 0 and "spin" not in line:
                    small_dict["Mulliken GP"][cleanlines[0]] = float(cleanlines[1])
                    small_dict["Loewdin GP"][cleanlines[0]] = float(cleanlines[2])
                    if "total" in cleanlines[0]:
                        self.list_dict_grosspop.append(small_dict)

    def get_structure_with_total_grosspop(self, structure_filename: PathLike) -> Structure:
        """Get a Structure with Mulliken and Loewdin total grosspopulations as site properties.

        Args:
            structure_filename (PathLike): The POSCAR file.

        Returns:
            Structure Object with Mulliken and Loewdin total grosspopulations as site properties.
        """
        struct = Structure.from_file(structure_filename)
        if not self.is_lcfo:
            mulliken_gps: list[dict] = []
            loewdin_gps: list[dict] = []
            for grosspop in self.list_dict_grosspop:
                mulliken_gps.append(grosspop["Mulliken GP"]["total"])
                loewdin_gps.append(grosspop["Loewdin GP"]["total"])

            site_properties = {
                "Total Mulliken GP": mulliken_gps,
                "Total Loewdin GP": loewdin_gps,
            }
            return struct.copy(site_properties=site_properties)
        raise ValueError(
            "The GROSSPOP.LCFO.lobster data is not site wise. Thus, the site properties cannot be added.",
        )


class Wavefunction:
    """Read wave function files from LOBSTER and create an VolumetricData object.

    Attributes:
        grid (tuple[int, int, int]): Grid for the wave function [Nx+1, Ny+1, Nz+1].
        points (list[Tuple[float, float, float]]): Points.
        real (list[float]): Real parts of wave function.
        imaginary (list[float]): Imaginary parts of wave function.
        distance (list[float]): Distances to the first point in wave function file.
    """

    def __init__(self, filename: PathLike, structure: Structure) -> None:
        """
        Args:
            filename (PathLike): The wavecar file from LOBSTER.
            structure (Structure): The Structure object.
        """
        self.filename = filename
        self.structure = structure
        self.grid, self.points, self.real, self.imaginary, self.distance = Wavefunction._parse_file(filename)

    @staticmethod
    def _parse_file(
        filename: PathLike,
    ) -> tuple[tuple[int, int, int], list[tuple[float, float, float]], list[float], list[float], list[float]]:
        """Parse wave function file.

        Args:
            filename (PathLike): The file to parse.

        Returns:
            grid (tuple[int, int, int]): Grid for the wave function [Nx+1, Ny+1, Nz+1].
            points (list[Tuple[float, float, float]]): Points.
            real (list[float]): Real parts of wave function.
            imaginary (list[float]): Imaginary parts of wave function.
            distance (list[float]): Distances to the first point in wave function file.
        """
        lines = _get_lines(filename)

        points = []
        distances = []
        reals = []
        imaginaries = []
        line_parts = lines[0].split()
        grid: tuple[int, int, int] = (int(line_parts[7]), int(line_parts[8]), int(line_parts[9]))

        for line in lines[1:]:
            line_parts = line.split()
            if len(line_parts) >= 6:
                points.append((float(line_parts[0]), float(line_parts[1]), float(line_parts[2])))
                distances.append(float(line_parts[3]))
                reals.append(float(line_parts[4]))
                imaginaries.append(float(line_parts[5]))

        if len(reals) != grid[0] * grid[1] * grid[2] or len(imaginaries) != grid[0] * grid[1] * grid[2]:
            raise ValueError("Something went wrong while reading the file")

        return grid, points, reals, imaginaries, distances

    def set_volumetric_data(self, grid: tuple[int, int, int], structure: Structure) -> None:
        """Create the VolumetricData instances.

        Args:
            grid (tuple[int, int, int]): Grid on which wavefunction was calculated, e.g. (1, 2, 2).
            structure (Structure): The Structure object.
        """
        Nx = grid[0] - 1
        Ny = grid[1] - 1
        Nz = grid[2] - 1
        a = structure.lattice.matrix[0]
        b = structure.lattice.matrix[1]
        c = structure.lattice.matrix[2]
        new_x = []
        new_y = []
        new_z = []
        new_real = []
        new_imaginary = []
        new_density = []

        for runner, (x, y, z) in enumerate(itertools.product(range(Nx + 1), range(Ny + 1), range(Nz + 1))):
            x_here = x / float(Nx) * a[0] + y / float(Ny) * b[0] + z / float(Nz) * c[0]
            y_here = x / float(Nx) * a[1] + y / float(Ny) * b[1] + z / float(Nz) * c[1]
            z_here = x / float(Nx) * a[2] + y / float(Ny) * b[2] + z / float(Nz) * c[2]

            if x != Nx and y != Ny and z != Nz:
                if (
                    not np.isclose(self.points[runner][0], x_here, 1e-3)
                    and not np.isclose(self.points[runner][1], y_here, 1e-3)
                    and not np.isclose(self.points[runner][2], z_here, 1e-3)
                ):
                    raise ValueError(
                        "The provided wavefunction from Lobster does not contain all relevant"
                        " points. "
                        "Please use a line similar to: printLCAORealSpaceWavefunction kpoint 1 "
                        "coordinates 0.0 0.0 0.0 coordinates 1.0 1.0 1.0 box bandlist 1 "
                    )

                new_x.append(x_here)
                new_y.append(y_here)
                new_z.append(z_here)

                new_real.append(self.real[runner])
                new_imaginary.append(self.imaginary[runner])
                new_density.append(self.real[runner] ** 2 + self.imaginary[runner] ** 2)

        self.final_real = np.reshape(new_real, [Nx, Ny, Nz])
        self.final_imaginary = np.reshape(new_imaginary, [Nx, Ny, Nz])
        self.final_density = np.reshape(new_density, [Nx, Ny, Nz])

        self.volumetricdata_real = VolumetricData(structure, {"total": self.final_real})
        self.volumetricdata_imaginary = VolumetricData(structure, {"total": self.final_imaginary})
        self.volumetricdata_density = VolumetricData(structure, {"total": self.final_density})

    def get_volumetricdata_real(self) -> VolumetricData:
        """Get a VolumetricData object including the real part of the wave function.

        Returns:
            VolumetricData
        """
        if not hasattr(self, "volumetricdata_real"):
            self.set_volumetric_data(self.grid, self.structure)
        return self.volumetricdata_real

    def get_volumetricdata_imaginary(self) -> VolumetricData:
        """Get a VolumetricData object including the imaginary part of the wave function.

        Returns:
            VolumetricData
        """
        if not hasattr(self, "volumetricdata_imaginary"):
            self.set_volumetric_data(self.grid, self.structure)
        return self.volumetricdata_imaginary

    def get_volumetricdata_density(self) -> VolumetricData:
        """Get a VolumetricData object including the density part of the wave function.

        Returns:
            VolumetricData
        """
        if not hasattr(self, "volumetricdata_density"):
            self.set_volumetric_data(self.grid, self.structure)
        return self.volumetricdata_density

    def write_file(
        self,
        filename: PathLike = "WAVECAR.vasp",
        part: Literal["real", "imaginary", "density"] = "real",
    ) -> None:
        """Save the wave function in a file that can be read by VESTA.

        This will only work if the wavefunction from lobster is constructed with:
            "printLCAORealSpaceWavefunction kpoint 1 coordinates 0.0 0.0 0.0
            coordinates 1.0 1.0 1.0 box bandlist 1 2 3 4 5 6 "
            or similar (the whole unit cell has to be covered!).

        Args:
            filename (PathLike): The output file, e.g. "WAVECAR.vasp".
            part ("real" | "imaginary" | "density"]): Part of the wavefunction to save.
        """
        if not (
            hasattr(self, "volumetricdata_real")
            and hasattr(self, "volumetricdata_imaginary")
            and hasattr(self, "volumetricdata_density")
        ):
            self.set_volumetric_data(self.grid, self.structure)

        if part == "real":
            self.volumetricdata_real.write_file(filename)
        elif part == "imaginary":
            self.volumetricdata_imaginary.write_file(filename)
        elif part == "density":
            self.volumetricdata_density.write_file(filename)
        else:
            raise ValueError('part can be only "real" or "imaginary" or "density"')


# Madelung and site potential classes
class MadelungEnergies(MSONable):
    """Read MadelungEnergies.lobster files generated by LOBSTER.

    Attributes:
        madelungenergies_mulliken (float): The Madelung energy based on the Mulliken approach.
        madelungenergies_loewdin (float): The Madelung energy based on the Loewdin approach.
        ewald_splitting (float): The Ewald splitting parameter to compute SitePotentials.
    """

    def __init__(
        self,
        filename: PathLike = "MadelungEnergies.lobster",
        ewald_splitting: float | None = None,
        madelungenergies_mulliken: float | None = None,
        madelungenergies_loewdin: float | None = None,
    ) -> None:
        """
        Args:
            filename (PathLike): The "MadelungEnergies.lobster" file.
            ewald_splitting (float): The Ewald splitting parameter to compute SitePotentials.
            madelungenergies_mulliken (float): The Madelung energy based on the Mulliken approach.
            madelungenergies_loewdin (float): The Madelung energy based on the Loewdin approach.
        """
        self._filename = filename
        self.ewald_splitting = None if ewald_splitting is None else ewald_splitting
        self.madelungenergies_loewdin = None if madelungenergies_loewdin is None else madelungenergies_loewdin
        self.madelungenergies_mulliken = None if madelungenergies_mulliken is None else madelungenergies_mulliken

        if self.ewald_splitting is None:
            lines = _get_lines(filename)[5]
            if len(lines) == 0:
                raise RuntimeError("MadelungEnergies file contains no data.")

            line_parts = lines.split()
            self._filename = filename
            self.ewald_splitting = float(line_parts[0])
            self.madelungenergies_mulliken = float(line_parts[1])
            self.madelungenergies_loewdin = float(line_parts[2])

    @property
    @deprecated(message="Use `madelungenergies_loewdin` instead.", category=DeprecationWarning)
    def madelungenergies_Loewdin(self) -> float | None:
        return self.madelungenergies_loewdin

    @property
    @deprecated(message="Use `madelungenergies_mulliken` instead.", category=DeprecationWarning)
    def madelungenergies_Mulliken(self) -> float | None:
        return self.madelungenergies_mulliken


class SitePotential(MSONable):
    """Read SitePotentials.lobster files generated by LOBSTER.

    Attributes:
        atomlist (list[str]): Atoms in SitePotentials.lobster.
        types (list[str]): Types of atoms in SitePotentials.lobster.
        num_atoms (int): Number of atoms in SitePotentials.lobster.
        sitepotentials_mulliken (list[float]): Mulliken potentials of sites in SitePotentials.lobster.
        sitepotentials_loewdin (list[float]): Loewdin potentials of sites in SitePotentials.lobster.
        madelungenergies_mulliken (float): The Madelung energy based on the Mulliken approach.
        madelungenergies_loewdin (float): The Madelung energy based on the Loewdin approach.
        ewald_splitting (float): The Ewald Splitting parameter to compute SitePotentials.
    """

    def __init__(
        self,
        filename: PathLike = "SitePotentials.lobster",
        ewald_splitting: float | None = None,
        num_atoms: int | None = None,
        atomlist: list[str] | None = None,
        types: list[str] | None = None,
        sitepotentials_loewdin: list[float] | None = None,
        sitepotentials_mulliken: list[float] | None = None,
        madelungenergies_mulliken: float | None = None,
        madelungenergies_loewdin: float | None = None,
    ) -> None:
        """
        Args:
            filename (PathLike): The SitePotentials file, typically "SitePotentials.lobster".
            ewald_splitting (float): Ewald splitting parameter used for computing Madelung energies.
            num_atoms (int): Number of atoms in the structure.
            atomlist (list[str]): Atoms in the structure.
            types (list[str]): Unique atom types in the structure.
            sitepotentials_loewdin (list[float]): Loewdin site potentials.
            sitepotentials_mulliken (list[float]): Mulliken site potentials.
            madelungenergies_mulliken (float): Madelung energy based on the Mulliken approach.
            madelungenergies_loewdin (float): Madelung energy based on the Loewdin approach.
        """
        self._filename = filename
        self.ewald_splitting: list | float = ewald_splitting or []
        self.num_atoms: int | None = num_atoms
        self.types: list[str] = types or []
        self.atomlist: list[str] = atomlist or []
        self.sitepotentials_loewdin: list[float] = sitepotentials_loewdin or []
        self.sitepotentials_mulliken: list[float] = sitepotentials_mulliken or []
        self.madelungenergies_loewdin: list | float = madelungenergies_loewdin or []
        self.madelungenergies_mulliken: list | float = madelungenergies_mulliken or []

        if self.num_atoms is None:
            lines = _get_lines(filename)
            if len(lines) == 0:
                raise RuntimeError("SitePotentials file contains no data.")

            self._filename = filename
            self.ewald_splitting = float(lines[0].split()[9])

            lines = lines[5:]
            self.num_atoms = len(lines) - 2
            for atom in range(self.num_atoms):
                line_parts = lines[atom].split()
                self.atomlist.append(line_parts[1] + line_parts[0])
                self.types.append(line_parts[1])
                self.sitepotentials_mulliken.append(float(line_parts[2]))
                self.sitepotentials_loewdin.append(float(line_parts[3]))

            self.madelungenergies_mulliken = float(lines[self.num_atoms + 1].split()[3])
            self.madelungenergies_loewdin = float(lines[self.num_atoms + 1].split()[4])

    def get_structure_with_site_potentials(self, structure_filename: PathLike) -> Structure:
        """Get a Structure with Mulliken and Loewdin charges as site properties.

        Args:
            structure_filename (PathLike): The POSCAR file.

        Returns:
            Structure Object with Mulliken and Loewdin charges as site properties.
        """
        struct = Structure.from_file(structure_filename)
        mulliken = self.sitepotentials_mulliken
        loewdin = self.sitepotentials_loewdin
        site_properties = {
            "Mulliken Site Potentials (eV)": mulliken,
            "Loewdin Site Potentials (eV)": loewdin,
        }
        return struct.copy(site_properties=site_properties)

    @property
    @deprecated(message="Use `sitepotentials_mulliken` instead.", category=DeprecationWarning)
    def sitepotentials_Mulliken(self) -> list[float]:
        return self.sitepotentials_mulliken

    @property
    @deprecated(message="Use `sitepotentials_loewdin` instead.", category=DeprecationWarning)
    def sitepotentials_Loewdin(self) -> list[float]:
        return self.sitepotentials_loewdin

    @property
    @deprecated(message="Use `madelungenergies_mulliken` instead.", category=DeprecationWarning)
    def madelungenergies_Mulliken(self):
        return self.madelungenergies_mulliken

    @property
    @deprecated(message="Use `madelungenergies_loewdin` instead.", category=DeprecationWarning)
    def madelungenergies_Loewdin(self):
        return self.madelungenergies_loewdin


def get_orb_from_str(orbs: list[str]) -> tuple[str, list[tuple[int, Orbital]]]:
    """Get Orbitals from string representations.

    Args:
        orbs (list[str]): Orbitals, e.g. ["2p_x", "3s"].

    Returns:
        tuple[str, list[tuple[int, Orbital]]]: Orbital label, orbitals.
    """
    # TODO: also use for plotting of DOS
    orb_labs = (
        "s",
        "p_y",
        "p_z",
        "p_x",
        "d_xy",
        "d_yz",
        "d_z^2",
        "d_xz",
        "d_x^2-y^2",
        "f_y(3x^2-y^2)",
        "f_xyz",
        "f_yz^2",
        "f_z^3",
        "f_xz^2",
        "f_z(x^2-y^2)",
        "f_x(x^2-3y^2)",
    )
    orbitals = [(int(orb[0]), Orbital(orb_labs.index(orb[1:]))) for orb in orbs]

    orb_label = ""
    for iorb, orbital in enumerate(orbitals):
        if iorb == 0:
            orb_label += f"{orbital[0]}{orbital[1].name}"
        else:
            orb_label += f"-{orbital[0]}{orbital[1].name}"

    return orb_label, orbitals


class LobsterMatrices:
    """Read Matrices file generated by LOBSTER (e.g. hamiltonMatrices.lobster).

    Attributes:
        If filename == "hamiltonMatrices.lobster":
            onsite_energies (list[NDArray]): Real parts of onsite energies from the
                matrices each k-point.
            average_onsite_energies (dict): Average onsite elements energies for
                all k-points with keys as basis used in the LOBSTER computation
                (uses only real part of matrix).
            hamilton_matrices (dict[Spin, NDArray]): The complex Hamilton matrix at each
                k-point with k-point and spin as keys.

        If filename == "coefficientMatrices.lobster":
            onsite_coefficients (list[NDArray]): Real parts of onsite coefficients
                from the matrices each k-point.
            average_onsite_coefficient (dict): Average onsite elements coefficients
                for all k-points with keys as basis used in the LOBSTER computation
                (uses only real part of matrix).
            coefficient_matrices (dict[Spin, NDArray]): The coefficients matrix
                at each k-point with k-point and spin as keys.

        If filename == "transferMatrices.lobster":
            onsite_transfer (list[NDArray]): Real parts of onsite transfer
                coefficients from the matrices at each k-point.
            average_onsite_transfer (dict): Average onsite elements transfer
                coefficients for all k-points with keys as basis used in the
                LOBSTER computation (uses only real part of matrix).
            transfer_matrices (dict[Spin, NDArray]): The coefficients matrix at
                each k-point with k-point and spin as keys.

        If filename == "overlapMatrices.lobster":
            onsite_overlaps (list[NDArray]): Real parts of onsite overlaps
                from the matrices each k-point.
            average_onsite_overlaps (dict): Average onsite elements overlaps
                for all k-points with keys as basis used in the LOBSTER
                computation (uses only real part of matrix).
            overlap_matrices (dict[NDArray]): The overlap matrix at
                each k-point with k-point as keys.
    """

    def __init__(
        self,
        e_fermi: float | None = None,
        filename: PathLike = "hamiltonMatrices.lobster",
    ) -> None:
        """
        Args:
            e_fermi (float): Fermi level in eV for the structure only.
                Relevant if input file contains Hamilton matrices data.
            filename (PathLike): The hamiltonMatrices file, typically "hamiltonMatrices.lobster".
        """

        self._filename = str(filename)
        with zopen(self._filename, mode="rt", encoding="utf-8") as file:
            lines: list[str] = cast("list[str]", file.readlines())
        if len(lines) == 0:
            raise RuntimeError("Please check provided input file, it seems to be empty")

        pattern_coeff_hamil_trans = r"(\d+)\s+kpoint\s+(\d+)"  # regex pattern to extract spin and k-point number
        pattern_overlap = r"kpoint\s+(\d+)"  # regex pattern to extract k-point number

        if "hamilton" in self._filename:
            if e_fermi is None:
                raise ValueError("Please provide the fermi energy in eV ")
            (
                self.onsite_energies,
                self.average_onsite_energies,
                self.hamilton_matrices,
            ) = self._parse_matrix(file_data=lines, pattern=pattern_coeff_hamil_trans, e_fermi=e_fermi)

        elif "coefficient" in self._filename:
            (
                self.onsite_coefficients,
                self.average_onsite_coefficient,
                self.coefficient_matrices,
            ) = self._parse_matrix(file_data=lines, pattern=pattern_coeff_hamil_trans, e_fermi=0)

        elif "transfer" in self._filename:
            (
                self.onsite_transfer,
                self.average_onsite_transfer,
                self.transfer_matrices,
            ) = self._parse_matrix(file_data=lines, pattern=pattern_coeff_hamil_trans, e_fermi=0)

        elif "overlap" in self._filename:
            (
                self.onsite_overlaps,
                self.average_onsite_overlaps,
                self.overlap_matrices,
            ) = self._parse_matrix(file_data=lines, pattern=pattern_overlap, e_fermi=0)

    @staticmethod
    def _parse_matrix(
        file_data: list[str],
        pattern: str,
        e_fermi: float,
    ) -> tuple[list[np.ndarray], dict[Any, Any], dict[Any, Any]]:
        complex_matrices: dict = {}
        matrix_diagonal_values = []
        start_inxs_real = []
        end_inxs_real = []
        start_inxs_imag = []
        end_inxs_imag = []
        # Get indices of real and imaginary part of matrix for each k point
        for idx, line in enumerate(file_data):
            line = line.strip()
            if "Real parts" in line:
                start_inxs_real.append(idx + 1)
                if idx == 1:  # ignore the first occurrence as files start with real matrices
                    pass
                else:
                    end_inxs_imag.append(idx - 1)

                matches = re.search(pattern, file_data[idx - 1])
                if matches and len(matches.groups()) == 2:
                    complex_matrices[matches[2]] = {}

            if "Imag parts" in line:
                end_inxs_real.append(idx - 1)
                start_inxs_imag.append(idx + 1)

            # Explicitly add the last line as files end with imaginary matrix
            if idx == len(file_data) - 1:
                end_inxs_imag.append(len(file_data))

        # Extract matrix data and store diagonal elements
        matrix_real = []
        matrix_imag = []
        for start_inx_real, end_inx_real, start_inx_imag, end_inx_imag in zip(
            start_inxs_real, end_inxs_real, start_inxs_imag, end_inxs_imag, strict=True
        ):
            # Matrix with text headers
            matrix_real = file_data[start_inx_real:end_inx_real]
            matrix_imag = file_data[start_inx_imag:end_inx_imag]

            # Extract only numerical data and convert to NumPy arrays
            matrix_array_real = np.array([line.split()[1:] for line in matrix_real[1:]], dtype=float)
            matrix_array_imag = np.array([line.split()[1:] for line in matrix_imag[1:]], dtype=float)

            # Combine real and imaginary parts to create a complex matrix
            comp_matrix = matrix_array_real + 1j * matrix_array_imag

            matches = re.search(pattern, file_data[start_inx_real - 2])
            if matches and len(matches.groups()) == 2:
                spin = Spin.up if matches[1] == "1" else Spin.down
                k_point = matches[2]
                complex_matrices[k_point] |= {spin: comp_matrix}
            elif matches and len(matches.groups()) == 1:
                k_point = matches[1]
                complex_matrices |= {k_point: comp_matrix}
            matrix_diagonal_values.append(comp_matrix.real.diagonal() - e_fermi)

        # Extract elements basis functions as list
        elements_basis_functions = [
            line.split()[:1][0] for line in matrix_real if line.split()[:1][0] != "basisfunction"
        ]

        # Get average row-wise
        average_matrix_diagonal_values = np.array(matrix_diagonal_values, dtype=float).mean(axis=0)

        # Get a dict with basis functions as keys and average values as values
        average_average_matrix_diag_dict = dict(
            zip(elements_basis_functions, average_matrix_diagonal_values, strict=True)
        )

        return (
            matrix_diagonal_values,
            average_average_matrix_diag_dict,
            complex_matrices,
        )


class Polarization(MSONable):
    """Read POLARIZATION.lobster file generated by LOBSTER.

    Attributes:
        rel_mulliken_pol_vector (dict[str, float]): Relative Mulliken polarization vector.
        rel_loewdin_pol_vector (dict[str, float]): Relative Mulliken polarization vector.
    """

    def __init__(
        self,
        filename: PathLike = "POLARIZATION.lobster",
        rel_mulliken_pol_vector: dict[str, float | str] | None = None,
        rel_loewdin_pol_vector: dict[str, float | str] | None = None,
    ) -> None:
        """
        Args:
            filename (PathLike): The "POLARIZATION.lobster" file.
            rel_mulliken_pol_vector (dict[str, Union[float, str]]): Relative Mulliken polarization vector.
            rel_loewdin_pol_vector (dict[str, Union[float, str]]): Relative Loewdin polarization vector.
        """
        self._filename = filename
        self.rel_mulliken_pol_vector = {} if rel_mulliken_pol_vector is None else rel_mulliken_pol_vector
        self.rel_loewdin_pol_vector = {} if rel_loewdin_pol_vector is None else rel_loewdin_pol_vector

        if not self.rel_loewdin_pol_vector and not self.rel_mulliken_pol_vector:
            lines = _get_lines(filename)
            if len(lines) == 0:
                raise RuntimeError("Polarization file contains no data.")

            for line in lines[4:]:
                cleanlines = [idx for idx in line.split(" ") if idx != ""]
                if cleanlines and len(cleanlines) == 3:
                    self.rel_mulliken_pol_vector[cleanlines[0]] = float(cleanlines[1])
                    self.rel_loewdin_pol_vector[cleanlines[0]] = float(cleanlines[2])
                if cleanlines and len(cleanlines) == 4:
                    self.rel_mulliken_pol_vector[cleanlines[0].replace(":", "")] = cleanlines[1].replace("\u03bc", "u")
                    self.rel_loewdin_pol_vector[cleanlines[2].replace(":", "")] = cleanlines[3].replace("\u03bc", "u")


class Bwdf(MSONable):
    """Read BWDF.lobster/BWDFCOHP.lobster file generated by LOBSTER.

    Attributes:
        centers (NDArray): Bond length centers for the distribution.
        bwdf (dict[Spin, NDArray]): Bond weighted distribution function.
        bin_width (float): Bin width used for computing the distribution by LOBSTER.
    """

    def __init__(
        self,
        filename: PathLike = "BWDF.lobster",
        centers: NDArray | None = None,
        bwdf: dict[Spin, NDArray] | None = None,
        bin_width: float | None = None,
    ) -> None:
        """
        Args:
            filename (PathLike): The "BWDF.lobster" file. Can also read BWDFCOHP.lobster.
            centers (NDArray): Bond length centers for the distribution.
            bwdf (dict[Spin, NDArray]): Bond weighted distribution function.
            bin_width (float): Bin width used for computing the distribution by LOBSTER.
        """
        self._filename = filename
        self.centers = np.array([]) if centers is None else centers
        self.bwdf = {} if bwdf is None else bwdf
        self.bin_width = 0.0 if bin_width is None else bin_width

        if not self.bwdf:
            lines = _get_lines(filename)
            if len(lines) == 0:
                raise RuntimeError("BWDF file contains no data.")

            self.bwdf[Spin.up] = np.array([])
            self.bwdf[Spin.down] = np.array([])
            for line in lines[1:]:
                clean_line = line.strip().split()
                self.centers = np.append(self.centers, float(clean_line[0]))
                if len(clean_line) == 3:
                    self.bwdf[Spin.up] = np.append(self.bwdf[Spin.up], float(clean_line[1]))
                    self.bwdf[Spin.down] = np.append(self.bwdf[Spin.down], float(clean_line[2]))
                else:
                    self.bwdf[Spin.up] = np.append(self.bwdf[Spin.up], float(clean_line[1]))

            if len(self.bwdf[Spin.down]) == 0:  # remove down spin key if not spin polarized calculation
                del self.bwdf[Spin.down]

            self.bin_width = np.diff(self.centers)[0]
